EWAS for MDD PRS


Methods

Subjects

From Generation Scotland,

  • Wave 1: N = 4977

  • Wave 2: N = 4312

DNA methylation data and technical covariates

Processed by Rosie, using EPIC array. M values.

Relatedness controlled for by regressing against GRM. Other regressors include batch and cell proportion.

Additional covariates

Ever smoke, pack years, age, sex and 20 methylation PCs.

Pipeline

From Mark, wiki.


EWAS Results

PRS pT=5e-8

PRS pT=5e-8 no MHC region

Number of significant CpGs associated with PRS

CpGs to nearest genome-wide significant locus

95% CI for distance to the nearest MDD risk locus:

  • Significant CpG: 36-3.14495510^{5}
  • Non-significant CpG: 1.16545510{5}-6.039677910{7}

Gene annotation

Pathway analysis

GO.result.sig=gometh(sig.cpg=fig.dat.GS.pT.5e_08$CpG[fig.dat.GS.pT.5e_08$p.adj<0.05], 
         all.cpg=fig.dat.GS.pT.5e_08$CpG, collection="GO",
              array.type = "EPIC") %>%
  .[order(.$P.DE,decreasing=F),] %>% 
  filter(.,FDR<0.05)
  
KEGG.result.sig=gometh(sig.cpg=fig.dat.GS.pT.5e_08$CpG[fig.dat.GS.pT.5e_08$p.adj<0.05], 
         all.cpg=fig.dat.GS.pT.5e_08$CpG, collection="KEGG",
              array.type = "EPIC") %>%
  .[order(.$P.DE,decreasing=F),] %>% 
  filter(.,FDR<0.05)

# pathway analysis for non-MHC probes
pathway.dat=fig.dat.GS.pT.5e_08 %>%
      mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No')) %>%
      filter(.,is.MHC == 'No')
GO.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="GO",
              array.type = "EPIC") %>%
  .[order(.$P.DE,decreasing=F),] 

KEGG.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="KEGG",
              array.type = "EPIC") %>%
  .[order(.$DE,decreasing=T),] 

GO results - all CpG

ONTOLOGY TERM N DE P.DE FDR
GO:0002428 BP antigen processing and presentation of peptide antigen via MHC class Ib 9 7 0 0
GO:0002476 BP antigen processing and presentation of endogenous peptide antigen via MHC class Ib 8 7 0 0
GO:0042611 CC MHC protein complex 23 15 0 0
GO:0042605 MF peptide antigen binding 23 12 0 0
GO:0071556 CC integral component of lumenal side of endoplasmic reticulum membrane 26 12 0 0
GO:0098553 CC lumenal side of endoplasmic reticulum membrane 26 12 0 0
GO:0002478 BP antigen processing and presentation of exogenous peptide antigen 164 18 0 0
GO:0019884 BP antigen processing and presentation of exogenous antigen 171 18 0 0
GO:0060333 BP interferon-gamma-mediated signaling pathway 86 15 0 0
GO:0048002 BP antigen processing and presentation of peptide antigen 177 18 0 0

KEGG results - all CpG

Description N DE P.DE FDR
path:hsa04940 Type I diabetes mellitus 41 14.0 0 0
path:hsa05330 Allograft rejection 34 13.0 0 0
path:hsa05332 Graft-versus-host disease 37 13.0 0 0
path:hsa04612 Antigen processing and presentation 66 15.0 0 0
path:hsa05320 Autoimmune thyroid disease 46 13.0 0 0
path:hsa05416 Viral myocarditis 55 13.0 0 0
path:hsa04145 Phagosome 140 16.0 0 0
path:hsa05150 Staphylococcus aureus infection 85 11.0 0 0
path:hsa05322 Systemic lupus erythematosus 112 12.5 0 0
path:hsa05310 Asthma 27 8.0 0 0

GO results - no MHC

ONTOLOGY TERM N DE P.DE FDR
GO:0099061 CC integral component of postsynaptic density membrane 50 2 0.0012904 1
GO:0099146 CC intrinsic component of postsynaptic density membrane 53 2 0.0013446 1
GO:0099060 CC integral component of postsynaptic specialization membrane 72 2 0.0021919 1
GO:0098948 CC intrinsic component of postsynaptic specialization membrane 75 2 0.0022612 1
GO:0072686 CC mitotic spindle 101 2 0.0023113 1
GO:0021691 BP cerebellar Purkinje cell layer maturation 2 1 0.0024461 1
GO:0021590 BP cerebellum maturation 3 1 0.0026990 1
GO:0021699 BP cerebellar cortex maturation 3 1 0.0026990 1
GO:1902412 BP regulation of mitotic cytokinesis 6 1 0.0027738 1
GO:0098839 CC postsynaptic density membrane 72 2 0.0028400 1
GO:0099634 CC postsynaptic specialization membrane 97 2 0.0043224 1
GO:0001093 MF TFIIB-class transcription factor binding 3 1 0.0045469 1
GO:0045666 BP positive regulation of neuron differentiation 353 3 0.0045874 1
GO:0021578 BP hindbrain maturation 6 1 0.0046396 1
GO:0070369 CC beta-catenin-TCF7L2 complex 3 1 0.0049050 1
GO:0021626 BP central nervous system maturation 7 1 0.0049102 1
GO:0043515 MF kinetochore binding 6 1 0.0053904 1
GO:0099055 CC integral component of postsynaptic membrane 113 2 0.0054125 1
GO:0098936 CC intrinsic component of postsynaptic membrane 118 2 0.0057831 1
GO:0051315 BP attachment of mitotic spindle microtubules to kinetochore 13 1 0.0058028 1

KEGG results - no MHC

Description N DE P.DE FDR
path:hsa04110 Cell cycle 121 1 0.0820648 1
path:hsa04114 Oocyte meiosis 114 1 0.0845197 1
path:hsa04514 Cell adhesion molecules 118 1 0.0975293 1
path:hsa04914 Progesterone-mediated oocyte maturation 86 1 0.0755825 1
path:hsa05166 Human T-cell leukemia virus 1 infection 190 1 0.1534105 1
path:hsa05203 Viral carcinogenesis 152 1 0.1253657 1
path:hsa00010 Glycolysis / Gluconeogenesis 64 0 1.0000000 1
path:hsa00020 Citrate cycle (TCA cycle) 28 0 1.0000000 1
path:hsa00030 Pentose phosphate pathway 25 0 1.0000000 1
path:hsa00040 Pentose and glucuronate interconversions 32 0 1.0000000 1
path:hsa00051 Fructose and mannose metabolism 32 0 1.0000000 1
path:hsa00052 Galactose metabolism 29 0 1.0000000 1
path:hsa00053 Ascorbate and aldarate metabolism 27 0 1.0000000 1
path:hsa00061 Fatty acid biosynthesis 15 0 1.0000000 1
path:hsa00062 Fatty acid elongation 26 0 1.0000000 1
path:hsa00071 Fatty acid degradation 42 0 1.0000000 1
path:hsa00072 Synthesis and degradation of ketone bodies 9 0 1.0000000 1
path:hsa00100 Steroid biosynthesis 18 0 1.0000000 1
path:hsa00120 Primary bile acid biosynthesis 17 0 1.0000000 1
path:hsa00130 Ubiquinone and other terpenoid-quinone biosynthesis 11 0 1.0000000 1

Summary of EWAS for MDD PRS pT = 5e-8

# 1. No. of significant CpGs in the MHC region
ewas.pTwgsig = ewas.pTwgsig %>%
  mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No'))
no.MHC.sig = table(ewas.pTwgsig$is.MHC[ewas.pTwgsig$p.adj<0.05])
no.MHC.sig
## 
##  No Yes 
##  37 688
# 2. Top 10 CpGs
top.10.sig = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  .[order(.$P.value,decreasing = F),] 
top.10.sig[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
84 cg03270340 NA NA 55.5951 2.4688 0 ++ 97.6 41.064 1 0 6 28891204 0 3551 yes Yes
29 cg00903577 NA NA 63.8098 2.8351 0 ++ 99.1 106.379 1 0 6 28831109 0 3099 yes Yes
354 cg12914966 NA NA 79.4726 3.6703 0 ++ 99.4 171.532 1 0 6 28830789 0 2779 yes Yes
470 cg16677399 NA NA 51.7185 2.4974 0 ++ 99.5 212.566 1 0 6 28830902 0 2892 yes Yes
389 cg14345882 NA NA -51.2371 2.5431 0 – 99.3 148.961 1 0 6 26364793 0 137 yes Yes
182 cg06608359 NA NA 59.9925 3.0263 0 ++ 97.1 34.908 1 0 6 28831300 0 3290 yes Yes
678 cg25643361 NA NA -44.1782 2.3263 0 – 98.1 53.559 1 0 6 26361389 0 41 yes Yes
721 cg27543291 NA NA -55.5490 2.9654 0 – 99.3 148.203 1 0 6 26367644 0 10 yes Yes
282 cg10046620 NA NA -41.8334 2.2423 0 – 98.5 66.796 1 0 6 27775042 0 14 yes Yes
233 cg08168669 NA NA -38.2512 2.0864 0 – 99.4 172.357 1 0 6 26367580 0 74 yes Yes
top10.max.p=max(top.10.sig$p.adj[1:10])

write.table(top.10.sig[1:10,],file=here::here('MR_meth_MDD/result/EWAS_MDDprs_Shen/top_CpG_withMHC.txt'),quote=F,col.names = T,row.names = F,sep='\t')

# 3. significant CpGs outside of the MHC region
top.10.sig.noMHC = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  filter(.,is.MHC=='No') %>%
  .[order(.$P.value,decreasing = F),] 
summary(top.10.sig.noMHC$p.adj)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 6.480e-06 1.515e-03 9.472e-03 7.844e-03 4.836e-02
# 4. Top 10 non-MHC CpGs
top.10.sig.noMHC[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
7 cg05590274 NA NA 16.9936 1.9302 0 ++ 94.4 17.997 1 0.0000221 11 113262625 0.0e+00 15822 yes No
26 cg17841099 NA NA -12.1025 1.5366 0 – 97.9 48.399 1 0.0000000 1 153940090 0.0e+00 21962506 yes No
35 cg26200795 NA NA 18.7156 2.4494 0 ++ 91.3 11.449 1 0.0007152 18 52895482 0.0e+00 5322 yes No
8 cg06276712 NA NA -12.4238 1.6339 0 – 96.6 29.631 1 0.0000001 7 12107011 0.0e+00 140319 yes No
27 cg17862947 NA NA 8.2523 1.1073 0 ++ 99.6 285.470 1 0.0000000 12 133086926 1.0e-07 11712199 yes No
6 cg05328885 NA NA 30.1986 4.1802 0 ++ 95.9 24.148 1 0.0000009 11 30943623 4.0e-07 1541 yes No
5 cg04317648 NA NA 11.9459 1.6968 0 ++ 86.6 7.456 1 0.0063220 1 8485376 1.5e-06 553 yes No
4 cg02403154 NA NA -14.5513 2.0861 0 – 0.0 0.386 1 0.5345000 18 52989223 2.3e-06 17013 yes No
22 cg14159747 NA NA -17.4837 2.5178 0 – 93.7 15.880 1 0.0000675 11 113255604 2.9e-06 22843 yes No
15 cg10154826 NA NA 36.6969 5.3737 0 ++ 96.1 25.769 1 0.0000004 6 17600994 6.5e-06 572321 yes No
top.10.sig.noMHC.sCHR=top.10.sig.noMHC[1:10,] %>%
  .[order(.$CHR),]
top10.noMHC.max.p=max(top.10.sig.noMHC$p.adj[1:10])

write.table(top.10.sig.noMHC[1:10,],file=here::here('MR_meth_MDD/result/EWAS_MDDprs_Shen/top_CpG_noMHC.txt'),quote=F,col.names = T,row.names = F,sep='\t')


EWAS replication in LBC & ALSPAC

Methods

Subjects

  • From LBC: N = 1330
  • From ALSPAC: N = 791

Data

LBC:

  • DNA methylation: Processed by Riccardo. 450k array. M-values.

  • Genetic: HRCv1.1 imputed data

  • LD reference: 1000G CEU sample, phase 3, hg19 genome build

ALSPAC:

  • DNA methylation: Processed by Riccardo. 450k array. M-values.

  • Genetic: HRCv1.1 imputed data

  • LD reference: 1000G CEU sample, phase 3, hg19 genome build

Covariates

Smoking status, age, sex, 20 methylation PCs, and cell proportions.

Pipeline

LBC:

From Mark, wiki.

Local copy (adapted to LBC data): /exports/igmm/eddie/GenScotDepression/shen/Tools/ewas_pipeline/

ALSPAC:

Run by Doretta

Results

Manhattan Plot

Correlation of beta for significant CpGs found in GS

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

Summry of replication analysis: on CpGs significant in GS (nCpG = 620)

  • Correlation between betas in GS and LBC for all CpGs: r = 0.795

  • Correlation between betas in GS and LBC for CpGs not in MHC region: r = 0.747

  • Betas remained in the same direction in LBC: 0%

  • CpGs nominally significant in LBC: 0%

Number of significant CpGs associated with PRS


SNP to CpG mapping


Heatmap

## png 
##   2
## # A tibble: 2 x 2
##   with_trans cis.p
## *      <dbl> <dbl>
## 1          0  4.61
## 2          1 14.4

Circular graph

## Using CpG as id variables


Mendelian Randomisation: CpG -> Brain cortical structure (discovery)


Methods

Datasets

  • Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.

  • Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)

Instruments

  • In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.

  • 615 had more than 30 genetic instruments available.

  • 18 CpGs left after ‘clumping’ (window = 3Mb, r=0.1) in the MHC region.

  • Finally, 20 CpGs with >=5 genetic instruments after data harmonisation entered MR.

MR methods

Results

25 CpGs tested:

Network plot: DNAm -> brain measures

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    filter(.,method==targetmethod) %>%
     mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='fdr')) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval))

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 


# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res,target.col='pval')
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total

Table: DNAm -> MDD

# Restore pcorr for MR Egger

mr.res.p = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                      ivw.padj=ivw.res$p.adj,ivw.p=ivw.res$pval,
                      mregger.padj=egger.res$p.adj,mregger.p=egger.res$pval,
                      wm.padj=wm.res$p.adj,wm.p=wm.res$pval) %>%
  mutate(sig_over2 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=2,1,0)) %>% 
  mutate(sig_over3 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=3,1,0))
mr.res.beta = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                         ivw.beta=ivw.res$b,
                      mregger.beta=egger.res$b,wm.beta=wm.res$b) %>%
  mutate(is.sign.consis = if_else(ivw.beta/mregger.beta>0&ivw.beta/wm.beta>0,1,0))

mr.summary=left_join(mr.res.p,mr.res.beta,by=c('exposure','outcome')) %>%
  mutate(is.valid = if_else(.$is.sign.consis==1&.$sig_over2==1,1,0)) %>%
  .[order(.$is.valid,decreasing = T),]

mr.sig = mr.summary %>%
  filter(.,is.valid==1)

QC.table = ivw.res %>%
  select(outcome,exposure,`Egger intercept`=egger_intercept,`Egger pval`=pval.1,
         Q,`Q pval`=Q_pval,Nsnp=nsnp)

mr.sig.output = mr.sig %>%
  select(-is.valid,-sig_over2,-is.sign.consis) %>%
  left_join(.,QC.table,by=c('exposure','outcome')) %>%
  .[order(.$exposure,.$outcome),] %>%
  select(ends_with('beta'),ends_with('.p'),ends_with('.padj'),
         everything()) %>%
  select(exposure,outcome,Nsnp,
         starts_with('ivw'),starts_with('wm'),starts_with('mregger'),
         everything()) %>%
  mutate_if(is.numeric, format, digits=3,nsmall = 0)

rownames(mr.sig.output)=NULL

saveRDS(mr.sig.output,file=here::here('MR_meth_MDD/result/GoDMC_MR_DNAm_to_MDD/mr_SigOutput.rds'))

# Make table
mr.sig.output %>%
  kbl(booktabs = TRUE) %>%
  kable_material(c("striped", "hover")) %>%
  add_header_above(c(" " = 3, "IVW" = 3, "WM" = 3, "MR-Egger" = 3, " " =1,
                     "QC Egger"=2,"QC Q (heterogeneity)"=2))
IVW
WM
MR-Egger
QC Egger
QC Q (heterogeneity)
exposure outcome Nsnp ivw.beta ivw.p ivw.padj wm.beta wm.p wm.padj mregger.beta mregger.p mregger.padj sig_over3 Egger intercept Egger pval Q Q pval
cg07519229 MDD 9 -0.0299 3.13e-09 1.10e-08 -0.0273 5.95e-07 9.25e-07 -0.0260 7.22e-02 1.52e-01 0 -0.00169 0.7329 15.02 0.058722
cg08344181 MDD 18 0.0449 6.42e-48 2.99e-47 0.0475 8.94e-49 6.26e-48 0.0566 8.78e-08 1.23e-06 1 -0.00786 0.0490 29.86 0.027400
cg13813247 MDD 9 -0.0188 3.03e-03 4.24e-03 -0.0184 1.20e-05 1.52e-05 -0.0243 7.85e-02 1.52e-01 0 0.00273 0.5923 27.08 0.000685
cg14159747 MDD 8 -0.0287 1.61e-05 3.75e-05 -0.0337 6.15e-08 1.23e-07 -0.0337 8.70e-02 1.52e-01 0 0.00248 0.7485 13.32 0.064678
cg17841099 MDD 11 -0.1028 2.57e-121 3.60e-120 -0.1073 4.22e-74 5.90e-73 -0.1221 9.54e-03 4.45e-02 1 0.00826 0.6147 4.58 0.917427
cg17862947 MDD 14 0.0418 1.18e-58 8.24e-58 0.0428 7.06e-48 3.30e-47 0.0523 8.56e-07 5.99e-06 1 -0.00825 0.0657 18.76 0.130827
cg17925084 MDD 6 0.0371 1.47e-04 2.29e-04 0.0404 4.80e-07 8.40e-07 0.0548 1.31e-01 2.03e-01 0 -0.00640 0.5459 12.77 0.025610
cg19624444 MDD 7 0.0257 2.76e-05 5.53e-05 0.0254 1.71e-09 4.78e-09 0.0279 4.52e-02 1.27e-01 0 -0.00125 0.7973 16.23 0.012589
cg19800032 MDD 5 0.0459 1.29e-04 2.26e-04 0.0478 3.56e-09 8.30e-09 0.0113 7.39e-01 7.39e-01 0 0.01125 0.3153 12.69 0.012901
cg23275840 MDD 5 0.0399 3.65e-06 1.02e-05 0.0375 3.22e-12 1.13e-11 0.0166 4.67e-01 5.95e-01 0 0.00942 0.2913 13.47 0.009195
ls.discovery = mr.sig.output[,c('exposure','outcome')]

Mendelian Randomisation: CpG -> Brain cortical structure (replication)


Methods

Datasets

  • Exposure: mQTL data from GS (N ~= 10,000).

  • Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)

Instruments

  • CpGs showed causal effect to any brain structural measure in the discovery analysis.

MR methods

Results

25 CpGs tested:

Network plot: DNAm -> MDD

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    filter(.,method==targetmethod) %>%
     mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='fdr')) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval))

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res,target.col='pval')
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total

Table: DNAm -> MDD

# Restore pcorr for MR Egger

mr.res.p = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                      ivw.padj=ivw.res$p.adj,ivw.p=ivw.res$pval,
                      mregger.padj=egger.res$p.adj,mregger.p=egger.res$pval,
                      wm.padj=wm.res$p.adj,wm.p=wm.res$pval) %>%
  mutate(sig_over2 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=2,1,0)) %>% 
  mutate(sig_over3 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=3,1,0))
mr.res.beta = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                         ivw.beta=ivw.res$b,
                      mregger.beta=egger.res$b,wm.beta=wm.res$b) %>%
  mutate(is.sign.consis = if_else(ivw.beta/mregger.beta>0&ivw.beta/wm.beta>0,1,0))


mr.summary=left_join(mr.res.p,mr.res.beta,by=c('exposure','outcome')) %>%
  mutate(is.valid = if_else(.$is.sign.consis==1&.$sig_over2==1,1,0)) %>%
  .[order(.$is.valid,decreasing = T),]

mr.sig = mr.summary %>%
  filter(.,is.valid==1)

QC.table = ivw.res %>%
  select(outcome,exposure,`Egger intercept`=egger_intercept,`Egger pval`=pval.1,
         Q,`Q pval`=Q_pval,Nsnp=nsnp)

mr.sig.output = mr.sig %>%
  select(-is.valid,-sig_over2,-is.sign.consis) %>%
  left_join(.,QC.table,by=c('exposure','outcome')) %>%
  left_join(ls.discovery,.,by=c('exposure','outcome')) %>%
  .[order(.$exposure,.$outcome),] %>%
  select(ends_with('beta'),ends_with('.p'),ends_with('.padj'),
         everything()) %>%
  select(exposure,outcome,Nsnp,
         starts_with('ivw'),starts_with('wm'),starts_with('mregger'),
         everything()) %>%
  mutate_if(is.numeric, format, digits=3,nsmall = 0)

colnames(mr.sig.output) = colnames(mr.sig.output) %>%
  gsub('ivw.','',.) %>%
  gsub('wm.','',.) %>%
  gsub('mregger.','',.)

rownames(mr.sig.output)=NULL

saveRDS(mr.sig.output,file=here::here('MR_meth_MDD/result/GS_MR_DNAm_to_MDD/mr_SigOutput.rds'))

# Make table
mr.sig.output %>%
  kbl(booktabs = TRUE) %>%
  kable_material(c("striped", "hover")) %>%
  add_header_above(c(" " = 3, "IVW" = 3, "WM" = 3, "MR-Egger" = 3, " " =1,
                     "QC Egger"=2,"QC Q (heterogeneity)"=2))
IVW
WM
MR-Egger
QC Egger
QC Q (heterogeneity)
exposure outcome Nsnp beta p padj beta p padj beta p padj sig_over3 Egger intercept Egger pval Q Q pval
cg07519229 MDD 15 -0.1343 7.55e-14 1.83e-13 -0.1292 1.38e-08 2.35e-08 -0.1851 0.000241 0.00205 1 4.44e-03 0.145 18.47 1.86e-01
cg08344181 MDD 18 0.6916 3.72e-17 1.05e-16 0.8601 1.62e-58 6.90e-58 0.6087 0.024919 0.04236 1 4.09e-03 0.725 100.74 6.49e-14
cg13813247 MDD 19 -0.1265 7.95e-24 2.70e-23 -0.1279 1.69e-15 5.73e-15 -0.1195 0.001258 0.00428 1 -7.88e-04 0.808 16.10 5.85e-01
cg14159747 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg17841099 MDD 11 -1.0219 2.48e-120 4.21e-119 -1.0586 5.81e-67 4.94e-66 -1.5454 0.001623 0.00460 1 2.22e-02 0.164 5.98 8.17e-01
cg17862947 MDD 11 0.9628 3.89e-106 3.31e-105 0.9423 4.91e-60 2.78e-59 0.7478 0.000724 0.00308 1 9.68e-03 0.166 5.75 8.36e-01
cg17925084 MDD 8 0.2106 8.95e-12 1.69e-11 0.1708 3.55e-07 5.49e-07 0.1492 0.031536 0.04874 1 4.91e-03 0.220 9.01 2.52e-01
cg19624444 MDD 10 0.0814 7.78e-08 1.20e-07 0.0762 4.54e-11 9.64e-11 0.0869 0.007024 0.01327 1 -1.16e-03 0.770 18.83 2.67e-02
cg19800032 MDD 8 0.1186 2.44e-02 2.77e-02 0.1017 5.21e-03 5.54e-03 0.0655 0.574526 0.69705 0 3.48e-03 0.598 26.57 3.98e-04
cg23275840 MDD 11 0.1993 1.44e-24 6.12e-24 0.1919 6.34e-14 1.54e-13 0.1982 0.004242 0.00901 1 9.52e-05 0.981 10.38 4.08e-01

Results

25 CpGs tested:

Network plot: MDD -> DNAm

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    filter(.,method==targetmethod) %>%
     mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='fdr')) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval))

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res,target.col='pval')
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total

Table: MDD -> DNAm

# Restore pcorr for MR Egger

mr.res.p = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                      ivw.padj=ivw.res$p.adj,ivw.p=ivw.res$pval,
                      mregger.padj=egger.res$p.adj,mregger.p=egger.res$pval,
                      wm.padj=wm.res$p.adj,wm.p=wm.res$pval) %>%
  mutate(sig_over2 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=2,1,0)) %>% 
  mutate(sig_over3 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=3,1,0))
mr.res.beta = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                         ivw.beta=ivw.res$b,
                      mregger.beta=egger.res$b,wm.beta=wm.res$b) %>%
  mutate(is.sign.consis = if_else(ivw.beta/mregger.beta>0&ivw.beta/wm.beta>0,1,0))


mr.summary=left_join(mr.res.p,mr.res.beta,by=c('exposure','outcome')) %>%
  mutate(is.valid = if_else(.$is.sign.consis==1&.$sig_over2==1,1,0)) %>%
  .[order(.$is.valid,decreasing = T),]

mr.sig = mr.summary %>%
  filter(.,is.valid==1)

QC.table = ivw.res %>%
  mutate(pEgger.adj = p.adjust(pval.1,method='fdr')) %>% 
  mutate(pQ.adj=p.adjust(Q_pval,method='fdr')) %>% 
  select(outcome,exposure,`Egger intercept`=egger_intercept,`Egger pval`=pval.1,
         pEgger.adj,
         Q,`Q pval`=Q_pval,pQ.adj, 
         Nsnp=nsnp)

mr.sig.output = mr.sig %>%
  select(-is.valid,-sig_over2,-is.sign.consis) %>%
  left_join(.,QC.table,by=c('exposure','outcome')) %>%
  left_join(ls.discovery,.,by=c('exposure','outcome')) %>%
  .[order(.$exposure,.$outcome),] %>%
  select(ends_with('beta'),ends_with('.p'),ends_with('.padj'),
         everything()) %>%
  select(exposure,outcome,Nsnp,
         starts_with('ivw'),starts_with('wm'),starts_with('mregger'),
         everything()) %>%
  mutate_if(is.numeric, format, digits=3,nsmall = 0)

colnames(mr.sig.output) = colnames(mr.sig.output) %>%
  gsub('ivw.','',.) %>%
  gsub('wm.','',.) %>%
  gsub('mregger.','',.)

rownames(mr.sig.output)=NULL

saveRDS(mr.sig.output,file=here::here('MR_meth_MDD/result/GS_MR_MDD_to_DNAm/mr_SigOutput.rds'))

# Make table
mr.sig.output %>%
  kbl(booktabs = TRUE) %>%
  kable_material(c("striped", "hover")) %>%
  add_header_above(c(" " = 3, "IVW" = 3, "WM" = 3, "MR-Egger" = 3, " " =1,
                     "QC Egger"=3,"QC Q (heterogeneity)"=3))
IVW
WM
MR-Egger
QC Egger
QC Q (heterogeneity)
exposure outcome Nsnp beta p padj beta p padj beta p padj sig_over3 Egger intercept Egger pval pEgger.adj Q Q pval pQ.adj
cg07519229 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg08344181 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg13813247 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg14159747 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg17841099 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg17862947 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg17925084 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg19624444 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg19800032 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg23275840 MDD NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

p plot for MR: discovery, replication and bidirectional MR

p.plot.mr = function(tmp.res,tmp.facetcol='exposure',tmp.ycol='exposure',tmp.title){
dat.fig = base::Reduce(function(x,y) rbind(x,y),tmp.res) %>%
  left_join(mr.sig.output[,c('exposure','outcome')],.,var=c('exposure','outcome')) %>%
  dplyr::rename(Beta=b,SE=se,Method=method) %>%
  #.[order(.$pval,decreasing = T),] %>%
  mutate(ord=1:nrow(.)) 

fig.p =
  ggplot(dat.fig, aes(x=reorder(get(tmp.ycol),ord), y=-log10(pval), color=Method)) +
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  #facet_grid(rows = vars(get(tmp.facetcol)))+
  theme(
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  scale_x_discrete(position='top')+
  geom_hline(yintercept=0,color = "black", size=0.3)+
  geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
  coord_flip()+
  ylab('-log10(p) for MR')+
  xlab('CpG') +
  #ylim(0,60)+
  ggtitle(tmp.title)

 return(fig.p)
}

# Discovery analysis
res = as.list(paste0('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GoDMC_MR_DNAm_to_MDD/Summary_DNAm_to_MDD.csv')) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F)
names(res)='MDD'
mr.sig.output = res[[1]] %>%#
  filter(method=='Inverse variance weighted') %>%
  .[order(.$pval,decreasing = T),] %>%
  select(exposure,outcome)
  
fig.p.discovery.mr = p.plot.mr(res,tmp.title='DNAm (GoDMC) to MDD')
## Joining, by = c("exposure", "outcome")
# Replication analysis
res = as.list(paste0('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_DNAm_to_MDD/Summary_DNAm_to_MDD.csv')) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F)
names(res)='MDD'
fig.p.replication.mr = p.plot.mr(res,tmp.title='DNAm (GS) to MDD')
## Joining, by = c("exposure", "outcome")
# Reversed direction MR
res = as.list(list.files('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_MDD_to_DNAm/',pattern='^Summary',full.names = T)) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F) %>%
  lapply(.,dplyr::rename,outcome=exposure,exposure=outcome)
names(res)=list.files('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_Brain_to_DNAm/',pattern='^Summary') %>%
  gsub('Summary_Brain_to_','',.) %>%
  gsub('.csv','',.)
fig.p.reverse.mr = p.plot.mr(res,tmp.title='MDD to DNAm (GS)')
## Joining, by = c("exposure", "outcome")
fig.total = ggarrange(fig.p.discovery.mr,fig.p.replication.mr,fig.p.reverse.mr,ncol=3,
                      common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 48,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_res_pplot.png'))
fig.total